import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
## 1.Cleaning up the dataset by renaming columns and removing unnecessary rows.
## 2.Converting date columns to the appropriate format.
## 3.Converting relevant columns to numeric or boolean types for analysis.
## Load The Data Set
file_path = '/Users/cactusjack/Downloads/Delta Airlines Loyalty Customer Raw Data - Frequent Flier Raw.csv'
delta_data = pd.read_csv(file_path)
file_path1 = '/Users/cactusjack/Downloads/Delta Airlines Loyalty Customer Raw Data - Flier Upcoming Flights Raw.csv'
delta_upcoming_flights = pd.read_csv(file_path1)
delta_upcoming_flights.head()
| Join Date | Frequent Flier Number | First Name | Last Name | Flight Number | Flight Date | Departing City | Arrival City | Departure Time | Arrival Time | Departure Time Zone | Arrival Time Zone | Cost | Car Rental Addon $ | Hotel Addon $ | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2009-10-07 | 9531726184 | Megan | Brown | DL7263 | 2024-11-04 | Berlin | Miaml | 4:38 | 9:38 | UTC | CST | 998.55 BRL | 197.5 BRL | 419.24 BRL |
| 1 | 1996-07-29 | 7654154528 | William | Lee | DL685k | 2024-11-04 | Dubai | Hong Kong | 22:38 | 8:38 | UTC | UTk | 283.62 AUD | 256.04 AUD | 302 |
| 2 | 1990-03-11 | 1035783281 | Courtney | Valdez | DL8569 | 2025-01-03 | Houston | Paris | 0:38 | 10:38 | PST | UTC | 335.82 GBP | 117.56 GBP | 263.57 GBP |
| 3 | 2002-12-16 | 787669065 | Erik | Schneider | DL7894 | 2025-07-21 | New York | Miami | 23:38 | 10:38 | EST | PSy | 686.64 USD | 77.29 USD | 197.77 USD |
| 4 | NaN | 2871854617 | Alexander | Johnson | DL6365 | 2025-01-21 | Dallas | New York | 2:38 | 4:38 | CST | EST | 601.82 USD | 161.71 USD | 197.69 USD |
new_column_names = ['Record_Creation', 'Join_Date', 'First_Name', 'Last_Name', 'Frequent_Flier_Number', 'Flight_Number', 'Departure_Airport', 'Arrival_Airport', 'Flight_Date', 'Flight_Time', 'Seat_Number', 'Meal_Preference', 'Checked_Bags', 'Special_Assistance', 'Flight_Status']
delta_upcoming_flights.columns = new_column_names
delta_upcoming_flights = delta_upcoming_flights.dropna(subset=['Frequent_Flier_Number', 'Flight_Number'])
delta_upcoming_flights.dtypes
Record_Creation object Join_Date int64 First_Name object Last_Name object Frequent_Flier_Number object Flight_Number object Departure_Airport object Arrival_Airport object Flight_Date datetime64[ns] Flight_Time object Seat_Number object Meal_Preference object Checked_Bags object Special_Assistance object Flight_Status object dtype: object
delta_upcoming_flights['Flight_Date'] = pd.to_datetime(delta_upcoming_flights['Flight_Date'], errors='coerce')
delta_upcoming_flights['Checked_Bags'] = pd.to_numeric(delta_upcoming_flights['Checked_Bags'], errors='coerce')
delta_upcoming_flights['Meal_Preference'].fillna('No Preference', inplace=True)
delta_upcoming_flights['Seat_Number'].fillna('Unassiged', inplace=True)
delta_upcoming_flights.head()
| Record_Creation | Join_Date | First_Name | Last_Name | Frequent_Flier_Number | Flight_Number | Departure_Airport | Arrival_Airport | Flight_Date | Flight_Time | Seat_Number | Meal_Preference | Checked_Bags | Special_Assistance | Flight_Status | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2009-10-07 | 9531726184 | Megan | Brown | DL7263 | 2024-11-04 | Berlin | Miaml | 2024-11-01 04:38:00 | 9:38 | UTC | CST | NaN | 197.5 BRL | 419.24 BRL |
| 1 | 1996-07-29 | 7654154528 | William | Lee | DL685k | 2024-11-04 | Dubai | Hong Kong | 2024-11-01 22:38:00 | 8:38 | UTC | UTk | NaN | 256.04 AUD | 302 |
| 2 | 1990-03-11 | 1035783281 | Courtney | Valdez | DL8569 | 2025-01-03 | Houston | Paris | 2024-11-01 00:38:00 | 10:38 | PST | UTC | NaN | 117.56 GBP | 263.57 GBP |
| 3 | 2002-12-16 | 787669065 | Erik | Schneider | DL7894 | 2025-07-21 | New York | Miami | 2024-11-01 23:38:00 | 10:38 | EST | PSy | NaN | 77.29 USD | 197.77 USD |
| 4 | NaN | 2871854617 | Alexander | Johnson | DL6365 | 2025-01-21 | Dallas | New York | 2024-11-01 02:38:00 | 4:38 | CST | EST | NaN | 161.71 USD | 197.69 USD |
file_path2 = '/Users/cactusjack/Downloads/Delta Airlines Loyalty Customer Raw Data - 3 Month Upcoming Flights Raw.csv'
delta_3month_upcoming_flight = pd.read_csv(file_path2)
delta_3month_upcoming_flight.head()
| Flight_ID | Flight Date | Departure City | Arrival City | Departure Time | Arrival Time | Departure Time Zone | Arrival Time Zone | Airline | Flight Number | ... | Baggage Allowance | Flight Duration | Layovers | Cancellation Policy | Booking Window | Meal Options | In-Flight Entertainment | Aircraft Type | Flight Status | Loyalty Points Earned | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | FL22499 | 2024-09-11 | Dallas | Berlin | 22:4j | 1:46 | NaN | UTC | Delta | Un5086 | ... | 2 bags | 8 hours | NaN | Refundable | 6 days | Gluten-Free | Movies | Airbus A380 | On Time | 2274 |
| 1 | FL62773 | 2024-11-06 | Sydney | Hong Kong | 5:46 | 12:46 | UTC | UTC | Lufthansa | Qa2994 | ... | 2 bags | 6 hours | Toronto for 3 hours | Refundable | 9 days | Non-Vegetarian | Movies | Boeing 787 | On Time | 4337 |
| 2 | FL96338 | 2024-09-12 | Los Angeleu | Atlanta | 6:46 | 17:46 | PST | MST | Lufthansa | Em9455 | ... | 2 bags | 15 hourj | NaN | Non-Refundable | 18 days | Gluten-Free | None | Airbus A380 | Delayed | 164 |
| 3 | FL85371 | 2024-09-20 | San Francisco | Tokyo | 7:46 | 13:4l | UTC | UTC | British Airways | Br6054 | ... | 1 bags | 15 hours | NaN | NaN | 19 days | Non-Vegetarian | Movies | Airbus A320 | Delayed | 2711 |
| 4 | FL65244 | 2024-10-02 | Miami | San Francisco | 10:46 | 20:46 | PST | UTC | Air Canada | Br3533 | ... | 3 bags | 9 hours | Hong Kong for 5 hours | Non-Refundable | 10 days | Non-Vegetarian | Movies | Airbus A320 | Scheduled | 2451 |
5 rows × 24 columns
delta_data.head()
| Unnamed: 0 | MAKE A COPY TO BEGIN YOUR WORK. THIS IS A READ ONLY FILE | Unnamed: 2 | Unnamed: 3 | Unnamed: 4 | Unnamed: 5 | Unnamed: 6 | Unnamed: 7 | Unnamed: 8 | Unnamed: 9 | Unnamed: 10 | Unnamed: 11 | Unnamed: 12 | Unnamed: 13 | Unnamed: 14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Record Creation | Frequent Flier Number | Join Date | First Name | Last Name | Inquiry Type | Most Recent Flight # | Lounge Used? | Departing Airport | Arrival Airport | Planned Snack? | Additional Snack? | # of Included Checked Bags | Total # of Checked Bags | Flight Delayed? |
| 1 | 2024-02-27 | 7234617746 | 1988/07/08 | debbie | spears | Flight Status | G35206241 | FALSE | DFW | DEN | FALSE | TRUE | 0 | 16 | TRUE |
| 2 | 2021-03-07 | 7234617746 | 1988/07/08 | debbie | Spears | New Flight | V81311927 | TRUE | JFK | SFO | FALSE | TRUE | 1 | 21 | FALSE |
| 3 | 2023-01-31 | 7234617746 | 07/08/1988 | debbie | spears | Cancel Flight | H82295055 | FALSE | MIA | ORD | FALSE | FALSE | 7 | 5 | TRUE |
| 4 | 2021-08-21 | 551121699 | 06/16/2022 | kyle | Boone | New Flight | E39886834 | TRUE | ttt | NaN | FALSE | TRUE | NaN | 8 | FALSE |
## Clean and Reformat the dataset
## Rename the columns
## Dropping the row
new_columns = ["Record_Creation", "Frequent_Flier_Number", "Join_Date", "First_Name", "Last_Name", "Inquiry_Type", "Most_Recent_Flight_Number", "Lounge_Used", "Departing_Airport", "Arrival_Airport", "Planned_Snack", "Additional_Snack", "Included_Checked_Bags", "Total_Checked_Bags", "Flight_Delayed"]
delta_data_Cleaned = delta_data.drop(index=0)
delta_data_Cleaned.columns = new_columns
## Convert Join_Date to datetime format and handle any errors
delta_data_Cleaned['Join_Date'] = pd.to_datetime(delta_data_Cleaned['Join_Date'], errors='coerce', format='%Y-%M-%D')
## Clean and reset index for further analysis
delta_data_Cleaned = delta_data_Cleaned.reset_index(drop=True)
## Check for missing or inconsistent data
## Convert boolean-like columns to proper boolean values
delta_data_Cleaned['Lounge_Used'] = delta_data_Cleaned['Lounge_Used'].apply(lambda x: x == 'TRUE')
delta_data_Cleaned['Flight_Delayed'] = delta_data_Cleaned['Flight_Delayed'].apply(lambda x: x == 'TRUE')
## Convert numeric columns to numeric, handle errors
delta_data_Cleaned['Included_Checked_Bags'] = pd.to_numeric(delta_data_Cleaned['Included_Checked_Bags'], errors='coerce')
delta_data_Cleaned['Total_Checked_Bags'] = pd.to_numeric(delta_data_Cleaned['Total_Checked_Bags'], errors='coerce')
delta_data_Cleaned.head()
| Record_Creation | Frequent_Flier_Number | Join_Date | First_Name | Last_Name | Inquiry_Type | Most_Recent_Flight_Number | Lounge_Used | Departing_Airport | Arrival_Airport | Planned_Snack | Additional_Snack | Included_Checked_Bags | Total_Checked_Bags | Flight_Delayed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2024-02-27 | 7234617746 | 1988/07/08 | debbie | spears | Flight Status | G35206241 | False | DFW | DEN | FALSE | TRUE | 0.0 | 16.0 | True |
| 1 | 2021-03-07 | 7234617746 | 1988/07/08 | debbie | Spears | New Flight | V81311927 | True | JFK | SFO | FALSE | TRUE | 1.0 | 21.0 | False |
| 2 | 2023-01-31 | 7234617746 | 07/08/1988 | debbie | spears | Cancel Flight | H82295055 | False | MIA | ORD | FALSE | FALSE | 7.0 | 5.0 | True |
| 3 | 2021-08-21 | 551121699 | 06/16/2022 | kyle | Boone | New Flight | E39886834 | True | ttt | NaN | FALSE | TRUE | NaN | 8.0 | False |
| 4 | 2022-03-01 | 551121699 | 16-06-2022 | KYLE | Boone | New Flight | R04390271 | False | SFO | DFW | FALSE | FALSE | 3.0 | 17.0 | False |
## Customer Segmentation
## Group by Frequent Flyer Number to calculate flight frequency, lounge usage, and other relevant metrics
## Count the number of flights per customer
## Sum of lounge usages (True = 1, False = 0)
## Sum of total checked bags
## Count number of delayed flights
customer_segmentation = delta_data_Cleaned.groupby('Frequent_Flier_Number').agg({'Record_Creation': 'count', 'Lounge_Used': lambda x: x.sum(), 'Total_Checked_Bags': 'sum', 'Flight_Delayed': lambda x: x.sum()}).reset_index()
## Lets rename columns for clarity
customer_segmentation.columns = ['Frequent_Flier_Number', 'Flight_Frequency', 'Lounge_Users', 'Total_Checked_Bags', 'Delayed_Flights']
print(customer_segmentation.head())
Frequent_Flier_Number Flight_Frequency Lounge_Users Total_Checked_Bags \ 0 1007638577 5 3 45.0 1 1007847878 9 8 85.0 2 1013150620 4 2 35.0 3 1015849337 5 3 53.0 4 1026781405 4 4 54.0 Delayed_Flights 0 2 1 7 2 1 3 2 4 3
customer_segmentation.head()
| Frequent_Flier_Number | Flight_Frequency | Lounge_Users | Total_Checked_Bags | Delayed_Flights | |
|---|---|---|---|---|---|
| 0 | 1007638577 | 5 | 3 | 45.0 | 2 |
| 1 | 1007847878 | 9 | 8 | 85.0 | 7 |
| 2 | 1013150620 | 4 | 2 | 35.0 | 1 |
| 3 | 1015849337 | 5 | 3 | 53.0 | 2 |
| 4 | 1026781405 | 4 | 4 | 54.0 | 3 |
## Flight Delay Analysis by Departure and Arrival Airports
## Calculate the number of delayed flights by departing airport
## Count delayed flights
## Total number of flights
delays_by_departure_airport = delta_data_Cleaned.groupby('Departing_Airport').agg({'Flight_Delayed': lambda x: x.sum(), 'Record_Creation': 'count'}).reset_index()
## Calculate the percentage of delays at each departing airport
## Rename columns
## Sort by highest percentage of delays
delays_by_departure_airport['Delay_Percentage'] = (delays_by_departure_airport['Flight_Delayed'] / delays_by_departure_airport['Record_Creation']) * 100
delays_by_departure_airport.columns = ['Departing_Airport', 'Delayed_Flights', 'Total_Flights', 'Delay_Percentage']
delays_by_departure_airport = delays_by_departure_airport.sort_values(by='Delay_Percentage', ascending=False)
print(delays_by_departure_airport.head())
Departing_Airport Delayed_Flights Total_Flights Delay_Percentage 6 MIA 426 785 54.267516 3 DFW 425 822 51.703163 9 SFO 418 811 51.541307 2 DEN 406 799 50.813517 7 ORD 434 858 50.582751
print(delta_data_Cleaned.columns)
Index(['Record_Creation', 'Frequent_Flier_Number', 'Join_Date', 'First_Name',
'Last_Name', 'Inquiry_Type', 'Most_Recent_Flight_Number', 'Lounge_Used',
'Departing_Airport', 'Arrival_Airport', 'Planned_Snack',
'Additional_Snack', 'Included_Checked_Bags', 'Total_Checked_Bags',
'Flight_Delayed'],
dtype='object')
delta_data_Cleaned.columns = delta_data_Cleaned.columns.str.strip()
print(delta_data_Cleaned.columns.to_list())
['Record_Creation', 'Frequent_Flier_Number', 'Join_Date', 'First_Name', 'Last_Name', 'Inquiry_Type', 'Most_Recent_Flight_Number', 'Lounge_Used', 'Departing_Airport', 'Arrival_Airport', 'Planned_Snack', 'Additional_Snack', 'Included_Checked_Bags', 'Total_Checked_Bags', 'Flight_Delayed']
## Flight Delay Analysis by Arrival Airport
delays_by_arrival_airport = delta_data_Cleaned.groupby('Arrival_Airport').agg({'Flight_Delayed': lambda x: x.sum(), 'Record_Creation': 'count'}).reset_index()
delays_by_arrival_airport['Delay_Percentage'] = (delays_by_arrival_airport['Flight_Delayed'] / delays_by_arrival_airport['Record_Creation']) * 100
delays_by_arrival_airport.columns = ['Arrival_Airport', 'Delayed_Flights', 'Total_Flights', 'Delay_Percentage']
delays_by_arrival_airport = delays_by_arrival_airport.sort_values(by='Delay_Percentage', ascending=False)
print("Delays by Departure Airport:")
print(delays_by_departure_airport.head())
Delays by Departure Airport: Departing_Airport Delayed_Flights Total_Flights Delay_Percentage 6 MIA 426 785 54.267516 3 DFW 425 822 51.703163 9 SFO 418 811 51.541307 2 DEN 406 799 50.813517 7 ORD 434 858 50.582751
print("\nDelays by Arrival Airport:")
print(delays_by_arrival_airport.head())
Delays by Arrival Airport: Arrival_Airport Delayed_Flights Total_Flights Delay_Percentage 4 JFK 416 793 52.459016 8 SEA 434 841 51.605232 6 MIA 424 837 50.657109 1 BOS 407 804 50.621891 7 ORD 395 783 50.446999
## Route analysis by grouping both Departure and Arrival airports
## Count of delayed flights for each route
## Total number of flights for each route
## The delay percentage for each route
route_analysis = delta_data_Cleaned.groupby(['Departing_Airport', 'Arrival_Airport']).agg({'Flight_Delayed': lambda x: x.sum(), 'Record_Creation': 'count'}).reset_index()
route_analysis['Delay_Percentage'] = (route_analysis['Flight_Delayed'] / route_analysis['Record_Creation']) * 100
route_analysis.columns = ['Departing_Airport', 'Arrival_Airport', 'Delayed_Flight', 'Total_Flights', 'Delay_Percentage']
route_analysis = route_analysis.sort_values(by='Delay_Percentage', ascending=False)
route_analysis.head()
| Departing_Airport | Arrival_Airport | Delayed_Flight | Total_Flights | Delay_Percentage | |
|---|---|---|---|---|---|
| 33 | DFW | JFK | 54 | 81 | 66.666667 |
| 69 | MIA | YYY | 23 | 36 | 63.888889 |
| 60 | MIA | ATL | 48 | 76 | 63.157895 |
| 107 | ttt | ORD | 26 | 42 | 61.904762 |
| 97 | SFO | ORD | 44 | 72 | 61.111111 |
route_analysis.to_csv('route_analysis.csv', index=False)
## Descriptive Analysis
## Summary statistics for customer segmentation
## Summary statistics for flight delays by departure airport
## Summary statistics for flight delays by arrival airport
## Summary statistics for route delays
customer_stats = customer_segmentation.describe()
departure_delay_stats = delays_by_arrival_airport.describe()
arrival_delay_stats = delays_by_arrival_airport.describe()
route_delay_stats = route_analysis.describe()
print("Customer Segmentation Statistics:\n", customer_stats)
print("\nDeparture Airport Delay Statistics:\n", departure_delay_stats)
print("\nArrival Airport Delay Statistics:\n", arrival_delay_stats)
print("\nRoute Delay Statistics:\n", route_delay_stats)
Customer Segmentation Statistics:
Flight_Frequency Lounge_Users Total_Checked_Bags Delayed_Flights
count 2000.000000 2000.000000 2000.0 2000.000000
mean 4.364500 2.139500 0.0 2.173500
std 1.872537 1.427256 0.0 1.473935
min 1.000000 0.000000 0.0 0.000000
25% 3.000000 1.000000 0.0 1.000000
50% 4.000000 2.000000 0.0 2.000000
75% 5.000000 3.000000 0.0 3.000000
max 10.000000 9.000000 0.0 9.000000
Departure Airport Delay Statistics:
Delayed_Flights Total_Flights Delay_Percentage
count 11.000000 11.000000 11.000000
mean 397.363636 795.363636 49.966270
std 57.933190 115.577050 1.348114
min 226.000000 456.000000 47.846890
25% 403.500000 798.500000 49.129693
50% 415.000000 836.000000 49.561404
75% 420.000000 841.500000 50.639500
max 434.000000 872.000000 52.459016
Arrival Airport Delay Statistics:
Delayed_Flights Total_Flights Delay_Percentage
count 11.000000 11.000000 11.000000
mean 397.363636 795.363636 49.966270
std 57.933190 115.577050 1.348114
min 226.000000 456.000000 47.846890
25% 403.500000 798.500000 49.129693
50% 415.000000 836.000000 49.561404
75% 420.000000 841.500000 50.639500
max 434.000000 872.000000 52.459016
Route Delay Statistics:
Delayed_Flight Total_Flights Delay_Percentage
count 111.000000 111.000000 111.000000
mean 37.342342 74.891892 49.823391
std 10.295185 18.108939 6.482699
min 13.000000 29.000000 36.363636
25% 33.000000 72.000000 45.235394
50% 38.000000 80.000000 49.411765
75% 44.000000 86.000000 55.223214
max 58.000000 105.000000 66.666667
## Correlation Analysis between lounge usage and flight delays
correlation_lounge_delay = customer_segmentation['Lounge_Users'].corr(customer_segmentation['Delayed_Flights'])
print("\nCorrelation between Lounge Usage and Delayed Flights: ", correlation_lounge_delay)
Correlation between Lounge Usage and Delayed Flights: 0.4243717768707256
## Predictive Analysis for Delta Airlines using historical data to forecast trends such as passenger demand, flight delays, and potential disruptions like strike
## Using time series forecasting for passenger demand
## Grouping the cleaned dataset by 'Record_Creation' to forecast passenger demand
## Split data into training and testing sets
## Apply Holt-Winters Exponential Smoothing for forecasting
## Make predictions for the test set
from statsmodels.tsa.holtwinters import ExponentialSmoothing
delta_data_Cleaned['Record_Creation'] = pd.to_datetime(delta_data_Cleaned['Record_Creation'])
passenger_demand = delta_data_Cleaned.groupby('Record_Creation').size().resample('M').sum()
train_size = int(len(passenger_demand) * 0.8)
train_data, test_data = passenger_demand[:train_size], passenger_demand[train_size:]
model = ExponentialSmoothing(train_data, seasonal='add', seasonal_periods=12).fit()
predictions = model.forecast(len(test_data))
/Users/cactusjack/opt/anaconda3/lib/python3.8/site-packages/statsmodels/tsa/holtwinters/model.py:427: FutureWarning: After 0.13 initialization must be handled at model creation warnings.warn(
plt.figure(figsize=(10, 6))
plt.plot(train_data.index, train_data, label='Train Data')
plt.plot(test_data.index, test_data, label='Test Data')
plt.plot(predictions.index, predictions, label='Predicted Passenger Demand', color='red')
plt.title('Passenger Demand Forecast')
plt.xlabel('Date')
plt.ylabel('Passenger Count')
plt.legend()
plt.show()
covariance = delta_data_Cleaned[['Flight_Delayed', 'Lounge_Used',]].cov()
print("Covariance between Flight Delayed and Customer Lounge Usage:\n", covariance)
Covariance between Flight Delayed and Customer Lounge Usage:
Flight_Delayed Lounge_Used
Flight_Delayed 0.249465 -0.001003
Lounge_Used -0.001003 0.249038
print(delta_data_Cleaned[['Flight_Delayed','Lounge_Used']].isna().sum())
Flight_Delayed 530 Lounge_Used 524 dtype: int64
Cleaned_data = delta_data_Cleaned[['Flight_Delayed','Lounge_Used']].dropna()
Cleaned_data['Flight_Delayed'] = pd.to_numeric(Cleaned_data['Flight_Delayed'], errors='coerce')
Cleaned_data['Lounge_Used'] = pd.to_numeric(Cleaned_data['Lounge_Used'], errors='coerce')
cov_matrix = cov_matrix.fillna(0)
print(delta_data_Cleaned.columns)
Index(['Record_Creation', 'Frequent_Flier_Number', 'Join_Date', 'First_Name',
'Last_Name', 'Inquiry_Type', 'Most_Recent_Flight_Number', 'Lounge_Used',
'Departing_Airport', 'Arrival_Airport', 'Planned_Snack',
'Additional_Snack', 'Included_Checked_Bags', 'Total_Checked_Bags',
'Flight_Delayed'],
dtype='object')
delta_data_Cleaned['Flight_Delayed'] = pd.to_numeric(delta_data_Cleaned['Flight_Delayed'], errors='coerce')
delta_data_Cleaned['Lounge_Used'] = pd.to_numeric(delta_data_Cleaned['Lounge_Used'], errors='coerce')
delta_data_Cleaned = delta_data_Cleaned.dropna(subset=['Flight_Delayed', 'Lounge_Used'])
print(delta_data_Cleaned[['Flight_Delayed', 'Lounge_Used']])
Empty DataFrame Columns: [Flight_Delayed, Lounge_Used] Index: []
import plotly.express as px
import plotly.graph_objects as go
## Prepare data
passenger_trends = delta_data_Cleaned.groupby('Record_Creation').size().reset_index(name='Passenger_Count')
## Plot interactive line chart
fig = px.line(
passenger_trends,
x='Record_Creation',
y='Passenger_Count',
title='Passenger Demand Over Time',
labels={'Passenger_Count': 'Number of Passengers'},
template='plotly_dark')
fig.show()